map of distributions


knitr::include_graphics(c("/Users/devder/Desktop/bic.png"))

#Do dapc and choose groups
#K=5
#assign samples to the number of groups present in popmap, retain all PCAs
#grp<-find.clusters(gen)
#set manually the values I chose based on looking at the scree plots
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
grp<-find.clusters(gen, n.pca=94, n.clust=5)
#run dapc, retain all discriminant axes, and enough PC axes to explain 75% of variance
#dapc1<-dapc(gen, grp$grp)
#set manually the values I chose from scree plots
dapc1<-dapc(gen, grp$grp, n.da = 5, n.pca =6)
#plot compoplot
compoplot(dapc1, legend=FALSE, show.lab =TRUE, cex.names=.4, col=gg_color_hue(5))

#
post<-as.data.frame(dapc1$posterior)
post$id<-rownames(post)
plot.pies<-merge(post, locs, by= "id")
#
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-123, -80), ylim = c(14, 47)) +
geom_scatterpie(aes(x=decimallongitude, y=decimallatitude, group = decimallatitude),
data = plot.pies, cols = colnames(plot.pies[,c(2:6)]))+
theme_classic()

#K=6
#set manually the values I chose based on looking at the scree plots
grp<-find.clusters(gen, n.pca=94, n.clust=6)
#set manually the values I chose from scree plots
dapc1<-dapc(gen, grp$grp, n.da = 6, n.pca =7)
#plot compoplot
compoplot(dapc1, legend=FALSE, show.lab =TRUE, cex.names=.4, col=gg_color_hue(6))

#
post<-as.data.frame(dapc1$posterior)
post$id<-rownames(post)
plot.pies<-merge(post, locs, by= "id")
#
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-123, -80), ylim = c(14, 47)) +
geom_scatterpie(aes(x=decimallongitude, y=decimallatitude, group = decimallatitude),
data = plot.pies, cols = colnames(plot.pies[,c(2:7)]))+
theme_classic()

#K=7
#set manually the values I chose based on looking at the scree plots
grp<-find.clusters(gen, n.pca=94, n.clust=7)
#set manually the values I chose from scree plots
dapc1<-dapc(gen, grp$grp, n.da = 7, n.pca =8)
#plot compoplot
compoplot(dapc1, legend=FALSE, show.lab =TRUE, cex.names=.4, col=gg_color_hue(7))

#
post<-as.data.frame(dapc1$posterior)
post$id<-rownames(post)
plot.pies<-merge(post, locs, by= "id")
#
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-123, -80), ylim = c(14, 47)) +
geom_scatterpie(aes(x=decimallongitude, y=decimallatitude, group = decimallatitude),
data = plot.pies, cols = colnames(plot.pies[,c(2:8)]))+
theme_classic()

#K=8
#set manually the values I chose based on looking at the scree plots
grp<-find.clusters(gen, n.pca=94, n.clust=8)
#set manually the values I chose from scree plots
dapc1<-dapc(gen, grp$grp, n.da = 8, n.pca =9)
#plot compoplot
compoplot(dapc1, legend=FALSE, show.lab =TRUE, cex.names=.4, col=gg_color_hue(8))

#
post<-as.data.frame(dapc1$posterior)
post$id<-rownames(post)
plot.pies<-merge(post, locs, by= "id")
#
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-123, -80), ylim = c(14, 47)) +
geom_scatterpie(aes(x=decimallongitude, y=decimallatitude, group = decimallatitude),
data = plot.pies, cols = colnames(plot.pies[,c(2:9)]))+
theme_classic()

#K=9
#set manually the values I chose based on looking at the scree plots
grp<-find.clusters(gen, n.pca=94, n.clust=9)
#set manually the values I chose from scree plots
dapc1<-dapc(gen, grp$grp, n.da = 9, n.pca =10)
#plot compoplot
compoplot(dapc1, legend=FALSE, show.lab =TRUE, cex.names=.4, col=gg_color_hue(9))

#
post<-as.data.frame(dapc1$posterior)
post$id<-rownames(post)
plot.pies<-merge(post, locs, by= "id")
#
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-123, -80), ylim = c(14, 47)) +
geom_scatterpie(aes(x=decimallongitude, y=decimallatitude, group = decimallatitude),
data = plot.pies, cols = colnames(plot.pies[,c(2:10)]))+
theme_classic()

#calculate divergence btwn samples
gen@pop<-locs$subspecies
inds<-stamppNeisD(gen, pop = FALSE)
#plot tree colored by subspecies
tree <- nj(inds)
#make manual color vector
col.vec<-as.character(locs$subspecies)
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
n = 15
cols = gg_color_hue(n)
j=1
#convert subspecies to colors
for (i in levels(as.factor(col.vec))){
col.vec[col.vec == i]<-cols[j]
j<-j+1
}
#col.vec<-col.vec[c(1:19,57,20:56,58:95)]
#plot map colored by subspecies
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-123, -80), ylim = c(14, 47)) +
geom_point(data = locs, aes(x = decimallongitude, y = decimallatitude, col=subspecies), alpha =.9, size=3, show.legend=TRUE,
position = position_jitter(width = 0.3, height = 0.3)) +
theme_classic()+
guides(colour = guide_legend(override.aes = list(size = 4), order=1, label.theme = element_text(face = "italic")),
size = guide_legend(nrow = 1, order = 2))+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),legend.key=element_blank(),legend.background=element_blank())

#plot ggtree
ggtree(tree)+
geom_tippoint(color=col.vec)+
geom_tiplab(cex=2)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `mutate_()` is deprecated as of dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

#subset genlight to only western scrub jays
wood.gen<-new("genlight",(as.matrix(gen)[locs$species == "californica" | locs$species == "woodhouseii" | locs$species == "sumichrasti", ]))
#subset loc info
wood.locs<-locs[locs$species == "californica" | locs$species == "woodhouseii" | locs$species == "sumichrasti", ]
wood.locs<-droplevels(wood.locs)
#plot map of woodfornia colored by subspecies
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-125, -95), ylim = c(15, 48)) +
geom_point(data = wood.locs, aes(x = decimallongitude, y = decimallatitude, col=subspecies), alpha =.9, size=3, show.legend=TRUE,
position = position_jitter(width = 0.3, height = 0.3)) +
theme_classic()+
guides(colour = guide_legend(override.aes = list(size = 4), order=1, label.theme = element_text(face = "italic")),
size = guide_legend(nrow = 1, order = 2))+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),legend.key=element_blank(),legend.background=element_blank())

#PCA of wood
wood.pca<-glPca(wood.gen, nf=6)
#pull pca scores out of df
wood.pca.scores<-as.data.frame(wood.pca$scores)
wood.pca.scores$subspecies<-wood.locs$subspecies
#ggplot color by subspecies
ggplot(wood.pca.scores, aes(x=PC1, y=PC2, color=subspecies)) +
geom_point(cex = 2)

#subset genlight to only california scrub jays
cali.gen<-new("genlight",(as.matrix(gen)[locs$species == "californica", ]))
#subset loc info
cali.locs<-locs[locs$species == "californica", ]
#PCA of cali
cali.pca<-glPca(cali.gen, nf=6)
#pull pca scores out of df
cali.pca.scores<-as.data.frame(cali.pca$scores)
#plot map of california colored by subspecies
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-125, -105), ylim = c(20, 50)) +
geom_point(data = cali.locs, aes(x = decimallongitude, y = decimallatitude, col=subspecies), alpha =.9, size=3, show.legend=TRUE,
position = position_jitter(width = 0.3, height = 0.3)) +
theme_classic()+
guides(colour = guide_legend(override.aes = list(size = 4), order=1, label.theme = element_text(face = "italic")),
size = guide_legend(nrow = 1, order = 2))+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),legend.key=element_blank(),legend.background=element_blank())

#ggplot color by subspecies
ggplot(cali.pca.scores, aes(x=PC1, y=PC2, color=cali.locs$subspecies)) +
geom_point(cex = 2)

#test IBD
cali.gen@pop<-cali.locs$subspecies
cali.inds<-stamppNeisD(cali.gen, pop = FALSE)
cali.inds<-as.dist(cali.inds)
#
cali.coords<-data.frame(Long=cali.locs$decimallongitude, Lat=cali.locs$decimallatitude)
#indica make dist matrix convert to km distance
cali.Dgeo <- as.dist(distm(cali.coords, fun=distGeo))
cali.Dgeo<-cali.Dgeo/1000
#run ibd test
IBD.cali <- mantel.randtest(cali.Dgeo,cali.inds)
IBD.cali
## Monte-Carlo test
## Call: mantel.randtest(m1 = cali.Dgeo, m2 = cali.inds)
##
## Observation: 0.4758088
##
## Based on 999 replicates
## Simulated p-value: 0.001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 4.586629748 0.001189826 0.010707861
#make tidy df with IBD data
ibd.df<-data.frame(species=rep("california", times=length(cali.inds)),geo.d=as.numeric(cali.Dgeo),gen.d=as.numeric(cali.inds))
#plot(cali.Dgeo,cali.inds, pch=20,cex=1.5, ylab = "Nei's Genetic Distance", xlab = "Geographic Distance (km)")
#abline(lm(cali.inds~cali.Dgeo), lty = 2)
#subset genlight to only woodhouse scrub jays
woodhouse.gen<-new("genlight",(as.matrix(gen)[locs$species == "woodhouseii" | locs$species == "sumichrasti", ]))
#subset loc info
woodhouse.locs<-locs[locs$species == "woodhouseii" | locs$species == "sumichrasti", ]
#PCA of woodhouse
woodhouse.pca<-glPca(woodhouse.gen, nf=6)
#pull pca scores out of df
woodhouse.pca.scores<-as.data.frame(woodhouse.pca$scores)
#plot map of woodhousefornia colored by subspecies
ggplot()+
geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="grey", col="black", cex=.1)+
coord_sf(xlim = c(-120, -95), ylim = c(15, 45)) +
geom_point(data = woodhouse.locs, aes(x = decimallongitude, y = decimallatitude, col=subspecies), alpha =.9, size=3, show.legend=TRUE, position = position_jitter(width = 0.3, height = 0.3)) +
theme_classic()+
guides(colour = guide_legend(override.aes = list(size = 4), order=1, label.theme = element_text(face = "italic")),
size = guide_legend(nrow = 1, order = 2))+
theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),legend.key=element_blank(),legend.background=element_blank())

#ggplot color by subspecies
ggplot(woodhouse.pca.scores, aes(x=PC1, y=PC2, color=woodhouse.locs$subspecies)) +
geom_point(cex = 2)

#test IBD
woodhouse.gen@pop<-woodhouse.locs$subspecies
inds<-stamppNeisD(woodhouse.gen, pop = FALSE)
inds<-as.dist(inds)
#
woodhouse.coords<-data.frame(Long=woodhouse.locs$decimallongitude, Lat=woodhouse.locs$decimallatitude)
#indica make dist matrix convert to km distance
wood.Dgeo <- as.dist(distm(woodhouse.coords, fun=distGeo))
wood.Dgeo<-wood.Dgeo/1000
#run ibd test
IBD <- mantel.randtest(wood.Dgeo,inds)
IBD
## Monte-Carlo test
## Call: mantel.randtest(m1 = wood.Dgeo, m2 = inds)
##
## Observation: 0.4799699
##
## Based on 999 replicates
## Simulated p-value: 0.001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 1.082751e+01 2.759766e-04 1.962777e-03
#make tidy df with IBD data
wood.ibd.df<-data.frame(species=rep("woodhouse", times=length(inds)),geo.d=as.numeric(wood.Dgeo),gen.d=as.numeric(inds))
#subset genlight to only interior woodhouse scrub jays
int.wood.gen<-new("genlight",(as.matrix(gen)[locs$species == "woodhouseii" & locs$subspecies != "texana", ]))
#subset loc info
int.woodhouse.locs<-locs[locs$species == "woodhouseii" & locs$subspecies != "texana", ]
#test IBD
int.wood.gen@pop<-int.woodhouse.locs$subspecies
int.inds<-stamppNeisD(int.wood.gen, pop = FALSE)
int.inds<-as.dist(int.inds)
#
int.woodhouse.coords<-data.frame(Long=int.woodhouse.locs$decimallongitude, Lat=int.woodhouse.locs$decimallatitude)
#indica make dist matrix convert to km distance
int.wood.Dgeo <- as.dist(distm(int.woodhouse.coords, fun=distGeo))
int.wood.Dgeo<-int.wood.Dgeo/1000
int.wood.Dgeo<-as.dist(int.wood.Dgeo)
#run ibd test
IBD <- mantel.randtest(int.wood.Dgeo,int.inds)
IBD
## Monte-Carlo test
## Call: mantel.randtest(m1 = int.wood.Dgeo, m2 = int.inds)
##
## Observation: 0.4565831
##
## Based on 999 replicates
## Simulated p-value: 0.001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 9.165424774 -0.001697597 0.002500103
#make df
int.wood.ibd<-data.frame(species=rep("interior woodhouse", times=length(int.inds)),geo.d=as.numeric(int.wood.Dgeo),gen.d=as.numeric(int.inds))
#combine dfs
ibdf<-rbind.data.frame(ibd.df,wood.ibd.df,int.wood.ibd)
#plot
ggplot(ibdf, aes(x=geo.d, y=gen.d, color=species))+
geom_point(alpha=.3, cex=2)+
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)+
theme_classic()+
labs(x="geographic distance (km)", y="Nei's genetic distance")
## `geom_smooth()` using formula 'y ~ x'

#plot(wood.Dgeo,inds, pch=20,cex=1.5, ylab = "Nei's Genetic Distance", xlab = "Geographic Distance (km)")
#abline(lm(inds~wood.Dgeo), lty = 2)
#calc heterozygosity and make df for plotting
gen.mat<-as.matrix(gen)
loci<-rowSums(is.na(gen.mat) == FALSE)
het<-rowSums(gen.mat == 1, na.rm = TRUE)/loci
het.df<-data.frame(id=locs$id,subspecies=locs$subspecies,species=locs$species,het=het)
#plot heterozygosity as violin plots for each subspecies
ggplot(het.df, aes(x=subspecies, y=het)) +
geom_violin(trim=FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = .35, alpha=.6)+
theme_classic()+
theme(axis.text.x = element_text(face = "italic", angle = 45, hjust = 1))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

#plot heterozygosity as violin plots for each species
ggplot(het.df, aes(x=species, y=het)) +
geom_violin(trim=FALSE)+
geom_dotplot(binaxis='y', stackdir='center', dotsize = .35, alpha=.6)+
theme_classic()+
theme(axis.text.x = element_text(face = "italic", angle = 45, hjust = 1))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
